Question 3: Which actors have been involved in the most conflicts since 1989?

Loading Data and Libraries

#clear environment
rm(list = ls())
options(warn=-1)

#loading libraries (must be installed before where necessary)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(network)
## 
## 'network' 1.17.1 (2021-06-12), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:network':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
##     get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
##     is.directed, list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(centiserve)
## Loading required package: Matrix
library(tidygraph)
## 
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:stats':
## 
##     filter
library(networkD3)
library(visNetwork)
library(htmlwidgets)
## 
## Attaching package: 'htmlwidgets'
## The following object is masked from 'package:networkD3':
## 
##     JS
library(ggraph)
#Loading the dataset
ged211 <- read_csv("ged211.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   relid = col_character(),
##   code_status = col_character(),
##   conflict_name = col_character(),
##   dyad_name = col_character(),
##   side_a = col_character(),
##   side_b = col_character(),
##   source_article = col_character(),
##   source_office = col_character(),
##   source_date = col_character(),
##   source_headline = col_character(),
##   source_original = col_character(),
##   where_coordinates = col_character(),
##   where_description = col_character(),
##   adm_1 = col_character(),
##   adm_2 = col_character(),
##   geom_wkt = col_character(),
##   country = col_character(),
##   region = col_character(),
##   date_start = col_datetime(format = ""),
##   date_end = col_datetime(format = "")
##   # ... with 1 more columns
## )
## ℹ Use `spec()` for the full column specifications.
head(ged211)

Network Analysis

Creating the network

First we need a usable dataframe

#Lets take each instance with a death toll over 0 and create a df with only side_a, side_b and total death toll
#remove type of violence =3 because civilians are not considered actors
over0 <- subset(ged211, best>0 & type_of_violence!=3)
over0 <- cbind.data.frame(over0$side_a,over0$side_b,over0$best)
colnames(over0) <- c("side_a","side_b","best")

#Grouping and totalling
gpd <- aggregate(over0$best, list(over0$side_a,over0$side_b), FUN=sum) 
colnames(gpd) <- c("side_a","side_b","total")
gpd <- gpd %>% arrange(desc(gpd$total))

#Look at the top values
head(gpd)

Then we need to get all the nodes, which are actors listed in either side_a or side_b

#All actors listed in side_a
sides_a <- gpd %>%
  distinct(side_a)

#All actors listed in side_b
sides_b <- gpd %>%
  distinct(side_b)

colnames(sides_a) <- c("sides")
colnames(sides_b) <- c("sides")

#Defining the nodes
nodes <- full_join(sides_a, sides_b, by = "sides")

#Defining the edges
edges <- gpd %>% 
  left_join(nodes, by = c("side_a" = "sides"))

edges <- edges %>% 
  left_join(nodes, by = c("side_b" = "sides")) 

Now we can create the network

#Bidirectional so multiple set to true
routes_network <- network(edges, vertex.attr = nodes, matrix.type = "edgelist",ignore.eval = FALSE, multiple=TRUE)

#Checking we have created a network
class(routes_network)
## [1] "network"
routes_network
##  Network attributes:
##   vertices = 1325 
##   directed = TRUE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = TRUE 
##   bipartite = FALSE 
##   total edges= 1212 
##     missing edges= 0 
##     non-missing edges= 1212 
## 
##  Vertex attribute names: 
##     sides vertex.names 
## 
##  Edge attribute names not shown

Now the network can be plotted

plot(routes_network, vertex.cex = 0.8, main="Network Structure")

Although this gives us a little bit of insight into the structure, it is not very meaningful by itself, so let’s work with an igraph

routes_igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE)
routes_igraph
## IGRAPH 2c39d74 DN-- 1325 1212 -- 
## + attr: name (v/c), total (e/n)
## + edges from 2c39d74 (vertex names):
##  [1] Government of Syria      ->Syrian insurgents     
##  [2] Government of Afghanistan->Taleban               
##  [3] Government of Eritrea    ->Government of Ethiopia
##  [4] Government of Iraq       ->IS                    
##  [5] Government of Sri Lanka  ->LTTE                  
##  [6] Government of Syria      ->IS                    
##  [7] Government of Ethiopia   ->EPLF                  
##  [8] Government of Ethiopia   ->EPRDF                 
## + ... omitted several edges

Using this network, we can easily query information on whether two actors are in conflict

are.connected(routes_igraph,"Government of India","Government of Pakistan") #TRUE
## [1] TRUE
are.connected(routes_igraph,"Government of Syria","IS") #TRUE
## [1] TRUE
are.connected(routes_igraph,"Government of Syria","Government of Pakistan") #FALSE
## [1] FALSE

We can also display the network as an edge list for each actor

(as_adj_edge_list(routes_igraph))$`Government of Syria`
## + 4/1212 edges from 2c39d74 (vertex names):
## [1] Government of Syria->IS               
## [2] Government of Syria->Syrian insurgents
## [3] Government of Syria->SDF              
## [4] Government of Syria->PYD
routes_network
##  Network attributes:
##   vertices = 1325 
##   directed = TRUE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = TRUE 
##   bipartite = FALSE 
##   total edges= 1212 
##     missing edges= 0 
##     non-missing edges= 1212 
## 
##  Vertex attribute names: 
##     sides vertex.names 
## 
##  Edge attribute names not shown
V(routes_igraph)$all <- degree(routes_igraph, mode = "all")

#Which actors have more than 10 times the median number of edges (most interconnected vertices)?
V(routes_igraph)[V(routes_igraph)$all > 10*median(V(routes_igraph)$all)]
## + 15/1325 vertices, named, from 2c39d74:
##  [1] Government of Afghanistan      Government of Ethiopia        
##  [3] Government of Sudan            Government of Pakistan        
##  [5] IS                             Government of India           
##  [7] Government of Philippines      Government of DR Congo (Zaire)
##  [9] Government of Myanmar (Burma)  Government of Chad            
## [11] Fulani                         Government of Mali            
## [13] Borana                         SDF                           
## [15] PYD

Degree Distribution

What does the distribution of edges look like

#Minimum number of edges in network
min(degree(routes_igraph))
## [1] 1
#Maximum number of edges in network
max(degree(routes_igraph))
## [1] 101
#Boxplot with distr of edges
boxplot(V(routes_igraph)$all, outline=FALSE, main="Boxplot of Edges")

#Vertex degree distr
hist(V(routes_igraph)$all, breaks=100, main="Degree Distribution of Organized Violence",xlab="# of Edges")

degree_distribution(routes_igraph)
##   [1] 0.000000000 0.724528302 0.129811321 0.055849057 0.029433962 0.018113208
##   [7] 0.011320755 0.006792453 0.004528302 0.004528302 0.003773585 0.002264151
##  [13] 0.001509434 0.000754717 0.001509434 0.000754717 0.000000000 0.000754717
##  [19] 0.000754717 0.000754717 0.000000000 0.000000000 0.000000000 0.000000000
##  [25] 0.000754717 0.000754717 0.000000000 0.000000000 0.000000000 0.000000000
##  [31] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [37] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [43] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [49] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [55] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [61] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [67] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [73] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [79] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [85] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [91] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##  [97] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000754717
#For degree_distribution a numeric vector of the same length as the maximum degree plus one. The first element is the relative frequency zero degree vertices, the second vertices with degree one, etc.

pie(degree_distribution(routes_igraph),main="Number of Edges")

#Number of vertices
gorder(routes_igraph)
## [1] 1325
#Number of edges
gsize(routes_igraph)
## [1] 1212
#Reading degrees of actors
head(degree(routes_igraph))
##       Government of Syria Government of Afghanistan     Government of Eritrea 
##                         4                        12                         3 
##        Government of Iraq   Government of Sri Lanka    Government of Ethiopia 
##                         9                         3                        13

Now lets look at the actors that are in conflict with the most other actors

#Dataframe with number of degrees, ordered
degs <- as.data.frame(degree(routes_igraph))
colnames(degs) <- c("Degree")
head(degs[order(-degs$Degree),])
## [1] 101  25  24  19  18  17
#Highest degrees
most_degs <- subset(degs,degs>12)
most_degs

Density

#Lets take each instance with a death toll over 0 and create a df with only side_a, side_b and total death toll
#remove type of violence =3 because civilians are not considered actors

#Three empty dataframes to be filled later
densities <- data.frame("Year" = 1989:2020, "Density" = NA)
latoras <- data.frame("Year" = 1989:2020, "Closeness" = NA)
meandeg <- data.frame("Year" = 1989:2020, "Degrees" = NA)
vertices_cnt <- data.frame("Year" = 1989:2020, "Nodes" = NA)
edges_cnt <- data.frame("Year" = 1989:2020, "Edges" = NA)

#Loop that calculates scores for the above dataframes for each year
#https://www.rdocumentation.org/packages/centiserve/versions/1.0.0
for (i in densities$Year) {
  
over0 <- subset(ged211, best>0 & type_of_violence!=3 & year==i)
over0 <- cbind.data.frame(over0$side_a,over0$side_b,over0$best)
colnames(over0) <- c("side_a","side_b","best")

#Grouping and totalling
gpd <- aggregate(over0$best, list(over0$side_a,over0$side_b), FUN=sum) 
colnames(gpd) <- c("side_a","side_b","total")
gpd <- gpd %>% arrange(desc(gpd$total))

#Look at the top values
head(gpd)


#All actors listed in side_a
sides_a <- gpd %>%
  distinct(side_a)

#All actors listed in side_b
sides_b <- gpd %>%
  distinct(side_b)

colnames(sides_a) <- c("sides")
colnames(sides_b) <- c("sides")

#Defining the nodes
nodes <- full_join(sides_a, sides_b, by = "sides")

#Defining the edges
edges <- gpd %>% 
  left_join(nodes, by = c("side_a" = "sides"))

edges <- edges %>% 
  left_join(nodes, by = c("side_b" = "sides")) 

routes_igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE)

densities$Density[densities$Year==i] <- edge_density(routes_igraph, loops = FALSE)
latoras$Closeness[latoras$Year==i] <- closeness.latora(routes_igraph)
meandeg$Degrees[meandeg$Year==i] <-mean(degree(routes_igraph))
vertices_cnt$Nodes[meandeg$Year==i] <-length(V(routes_igraph))
edges_cnt$Edges[meandeg$Year==i] <-length(E(routes_igraph))
}
densities
latoras
meandeg
plot(densities, main="Network Density over Time")

plot(latoras, main="Network Closeness over Time") #sum of inversed distances to all other nodes instead of the inversed of the sum of distances to all other nodes

plot(meandeg, main="Mean Degrees over Time")

plot(vertices_cnt, main="Node Count over Time")

plot(edges_cnt, main="Edge Count over Time")

#Tidy and arrange

routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
routes_igraph_tidy <- as_tbl_graph(routes_igraph)

routes_tidy %>% 
  activate(edges) %>% 
  arrange(desc(total))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Edge Data: 211 x 3 (active)
##    from    to total
##   <int> <int> <dbl>
## 1     1   127 20102
## 2     2   128  7633
## 3     3   129  4455
## 4     3   130  4362
## 5     4   131  3521
## 6     5    27  2330
## # … with 205 more rows
## #
## # Node Data: 255 x 1
##   sides                        
##   <chr>                        
## 1 Government of Afghanistan    
## 2 Government of Azerbaijan     
## 3 Jalisco Cartel New Generation
## # … with 252 more rows
routes_tidy
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Node Data: 255 x 1 (active)
##   sides                            
##   <chr>                            
## 1 Government of Afghanistan        
## 2 Government of Azerbaijan         
## 3 Jalisco Cartel New Generation    
## 4 Government of Syria              
## 5 Government of Yemen (North Yemen)
## 6 Comando Vermelho                 
## # … with 249 more rows
## #
## # Edge Data: 211 x 3
##    from    to total
##   <int> <int> <dbl>
## 1     1   127 20102
## 2     2   128  7633
## 3     3   129  4455
## # … with 208 more rows
routes_tidy %>% 
  activate(nodes) %>% 
  mutate(degree = centrality_degree(mode = 'total'))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Node Data: 255 x 2 (active)
##   sides                             degree
##   <chr>                              <dbl>
## 1 Government of Afghanistan              3
## 2 Government of Azerbaijan               1
## 3 Jalisco Cartel New Generation          7
## 4 Government of Syria                    3
## 5 Government of Yemen (North Yemen)      3
## 6 Comando Vermelho                       2
## # … with 249 more rows
## #
## # Edge Data: 211 x 3
##    from    to total
##   <int> <int> <dbl>
## 1     1   127 20102
## 2     2   128  7633
## 3     3   129  4455
## # … with 208 more rows
routes_tidy %>% 
  activate(nodes)%>% 
  mutate(degree = centrality_degree(mode = 'total'))  %>% 
  ggraph('nicely') + 
  geom_node_point(aes(size = degree)) +
  geom_edge_link()

routes_tidy %>% 
    mutate(centrality = centrality_authority()) %>% 
    ggraph(layout = 'kk') + 
    geom_edge_link() + 
    geom_node_point(aes(colour = centrality)) + 
    scale_color_continuous(guide = 'legend') + 
    theme_graph()

As we can see, the edges are not weighted

is.weighted(routes_igraph)
## [1] FALSE

So let’s weight by the total deaths in for conflict

#Tidy and arrange
#tidygraph
routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
routes_igraph_tidy <- as_tbl_graph(routes_igraph)

routes_tidy %>% 
  activate(edges) %>% 
  arrange(desc(total))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Edge Data: 211 x 3 (active)
##    from    to total
##   <int> <int> <dbl>
## 1     1   127 20102
## 2     2   128  7633
## 3     3   129  4455
## 4     3   130  4362
## 5     4   131  3521
## 6     5    27  2330
## # … with 205 more rows
## #
## # Node Data: 255 x 1
##   sides                        
##   <chr>                        
## 1 Government of Afghanistan    
## 2 Government of Azerbaijan     
## 3 Jalisco Cartel New Generation
## # … with 252 more rows
routes_tidy %>% 
  activate(nodes) %>% 
  mutate(degree = centrality_degree(mode = 'total'))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Node Data: 255 x 2 (active)
##   sides                             degree
##   <chr>                              <dbl>
## 1 Government of Afghanistan              3
## 2 Government of Azerbaijan               1
## 3 Jalisco Cartel New Generation          7
## 4 Government of Syria                    3
## 5 Government of Yemen (North Yemen)      3
## 6 Comando Vermelho                       2
## # … with 249 more rows
## #
## # Edge Data: 211 x 3
##    from    to total
##   <int> <int> <dbl>
## 1     1   127 20102
## 2     2   128  7633
## 3     3   129  4455
## # … with 208 more rows
head(E(routes_tidy)$total)
## [1] 20102  7633  4455  4362  3521  2330
routes_tidy <- set_edge_attr(routes_tidy, "weight", value= E(routes_tidy)$total)
is_weighted(routes_tidy)
## [1] TRUE
E(routes_tidy)[E(routes_tidy)$weight>0]
## + 211/211 edges from b57cdfb:
##  [1]  1->127  2->128  3->129  3->130  4->131  5-> 27  3->132  6->133  7-> 92
## [10]  3->134  8-> 20  9->135  3->136  4-> 20 10->129 11-> 20 12-> 20 13->100
## [19] 14-> 20  3->137 15-> 20 16->103 17->138 18->113  8->117 19->139 20->103
## [28] 21->140 22->103 23-> 20  1-> 20 24-> 30 25->141 26->142 27->143 28->144
## [37] 29->145 30->146 31->147 32->148  6->149 25-> 31 22-> 20 33->150 25->151
## [46] 34-> 20 35->144 20->138 36->137 37->152 31->153 38->154 39->155 40->103
## [55] 41-> 39 32->156 42->157 43-> 92 44->158  3->159 14-> 49 45->160 46-> 42
## [64] 47->161 48->162 32-> 20 14-> 51 49-> 42 37->163 50->164 51->165 52->142
## [73] 14->166 53->167 16-> 20 51->168 54->169 55->142 56-> 95 57->170 53->171
## [82]  1->172 58->103 34->173 59->174 59->175 14->176 60->177 61->178 62->179
## + ... omitted several edges
nodes[c(652,130,6,14,240,14,653,654,37,285,655,656,657),]
##  [1] NA                               "Santa Rosa de Lima Cartel"     
##  [3] "Comando Vermelho"               "Government of DR Congo (Zaire)"
##  [5] "Egbura Mozum"                   "Government of DR Congo (Zaire)"
##  [7] NA                               NA                              
##  [9] "Government of Ukraine"          NA                              
## [11] NA                               NA                              
## [13] NA
nodes[1:10,]
##  [1] "Government of Afghanistan"         "Government of Azerbaijan"         
##  [3] "Jalisco Cartel New Generation"     "Government of Syria"              
##  [5] "Government of Yemen (North Yemen)" "Comando Vermelho"                 
##  [7] "Government of Somalia"             "Government of Nigeria"            
##  [9] "Government of Ethiopia"            "Juarez Cartel"
#Fraction of deaths due to actors in most conflicts and deadliest conflicts

top10actors <- subset(E(routes_tidy), V(routes_tidy)$sides %in% c("IS","Fulani","Government of India","Government of Chad","Government of Myanmar (Burma)","Government of DR Congo (Zaire)", "Government of Sudan","Government of Pakistan","Government of Mali","Government of Ethiopia"))

sum(top10actors$total)/(sum(E(routes_tidy)$total))
## [1] 0.08274523
#10 deadliest dyads
sum(E(routes_tidy)[1:10]$total)/(sum(E(routes_tidy)$total))
## [1] 0.687182
pie(top10actors$total)

pie(E(routes_tidy)[1:10]$total)

head(closeness.latora(routes_tidy, vids = V(routes_tidy))) #Latora centrality for disconnected graphs
## [1] 0.1518159541 0.0001310101 0.0382130330 0.3915847692 0.8908332205
## [6] 0.0065505620

Using this we can see what the distribution of closeness looks like

boxplot(closeness.latora(routes_tidy, vids = V(routes_tidy)), main="Closeness Distribution")

#library(ggraph)
#ggraph(routes_tidy, layout = "graphopt") +   geom_node_point() +  geom_edge_link(aes(width = "weight"), alpha = 0.8) +   scale_edge_width(range = c(0.1, 3)) +  geom_node_text(aes(label = sides), repel = TRUE) +  labs(edge_width = "weight") +  theme_graph()

Interactive Plot with visNetwork and D3forcenetworklibrary

Jesse Sadler. (2017, October 25). Introduction to Network Analysis with R. Retrieved from https://www.jessesadler.com/post/network-analysis-with-r/

Bradley, B. (2021, May 25). From igraph to visNetwork. Retrieved from https://towardsdatascience.com/from-igraph-to-visnetwork-7bc5a76fdeec

visIgraph(routes_igraph)
visIgraph(routes_igraph) %>% visOptions(nodesIdSelection = TRUE,  highlightNearest = TRUE)
library(networkD3)
library(visNetwork)

#This finds the subclusters
sc <- cluster_walktrap(routes_igraph)
members <- membership(sc)

#Converting from igraph to a d3 network type
plt_d3 <- igraph_to_networkD3(routes_igraph, group = members)

#This creates the plot
pltint <- forceNetwork(Links = plt_d3$links, Nodes =plt_d3$nodes,
             Source = 'source', Target = 'target', NodeID = 'name',Group = 'group', fontSize = 30, opacity = 1, zoom = TRUE)

pltint
#This saves the plot as an html file
saveWidget(pltint, "network.html")

Sources Used

For the interactive plots:

Jesse Sadler. (2017, October 25). Introduction to Network Analysis with R. Retrieved from https://www.jessesadler.com/post/network-analysis-with-r/

Bradley, B. (2021, May 25). From igraph to visNetwork. Retrieved from https://towardsdatascience.com/from-igraph-to-visnetwork-7bc5a76fdeec